package main

import (
	"github.com/PuerkitoBio/goquery"
	"golang.org/x/text/transform"
	"time"
	"strings"
	"net/http"
	"io/ioutil"
	"os"
	"path"
	"sort"
	"golang.org/x/text/encoding/simplifiedchinese"
	"bytes"
	"fmt"
	url2 "net/url"
	"html/template"
)

func fetchHtml(bookFile string) {

	for {
		fmt.Println("抓取任务开始")
		internalFetch(bookFile)
		fmt.Println("抓取任务结束，开始睡觉")
		time.Sleep(5 * time.Minute)
	}
}

type bookArraySort []map[string]interface{}

func (m bookArraySort) Len() int {
	return len(m)
}
func (m bookArraySort) Less(i, j int) bool {
	iv, jv := getValue(m[i]), getValue(m[j])
	return iv > jv
}

func (m bookArraySort) Swap(i, j int) {
	m[i], m[j] = m[j], m[i]
}
func getValue(data map[string]interface{}) int64 {
	items := data["items"]
	items2 := items.([]map[string]string)
	if len(items2) > 0 {
		sub := items2[0]["value"]
		file, err := os.Open(path.Join(basedir, sub))
		if err == nil {
			defer file.Close()
			stat, err := file.Stat()
			if err == nil {
				return stat.ModTime().Unix()
			}
		}
	}
	return int64(1 << 32)
}

func internalFetch(bookPath string) {
	var err interface{}
	defer func() {
		if err != nil {
			fmt.Println(err)
		}
	}()

	urlBytes, err := ioutil.ReadFile(bookPath)
	if err != nil {
		return
	}
	urlArray := strings.Split(string(urlBytes), "\n")
	bookArray := make(bookArraySort, len(urlArray))
	var i = 0
	for _, v := range urlArray {
		v = strings.TrimSpace(v)
		if len(v) > 0 && strings.HasPrefix(v, "http") {
			bookInfo, err := fetchBook(v)
			if err != nil {
				fmt.Println(err)
				continue
			}
			bookArray[i] = bookInfo
			i++
		}
	}
	bookArray = bookArray[:i]
	sort.Sort(bookArray)

	content, err := Render("index", map[string]interface{}{"items": bookArray})

	if err == nil {
		err = ioutil.WriteFile(path.Join(basedir, "index.html"), []byte(content), os.ModePerm)
	}
}

func fetchBook(pageUrl string) (book map[string]interface{}, err error) {
	fmt.Println("开始抓取:" + pageUrl)
	content, err := httpGet(pageUrl)
	if err != nil {
		return
	}
	urlObj, err := url2.Parse(pageUrl)
	if err != nil {
		return
	}
	//fmt.Println("抓取结果:" + content)
	doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
	title := doc.Find("#info h1").Text()
	fmt.Printf("正在抓取:%v,%v\n", title, pageUrl)
	urls := doc.Find("#list dd a")
	var menus = make([]map[string]string, urls.Size())
	urls.Each(func(i int, e *goquery.Selection) {
		etitle := e.Text()
		eurl, _ := e.Attr("href")
		if ! strings.HasPrefix(eurl, "http") {
			eurl = urlObj.Scheme + "://" + urlObj.Host + eurl
		}
		epath := convertToPath(eurl)
		efile, err := os.Open(path.Join(basedir, epath))
		pair := make(map[string]string)
		pair["key"] = etitle
		pair["value"] = epath
		menus[urls.Size()-1-i] = pair
		if err == nil {
			defer efile.Close()
		} else {
			fmt.Printf("正在抓取: %v,%v\n", etitle, eurl)
			go fetchBookDetail(i == urls.Size()-1, etitle, eurl, epath)
		}
	})
	book = make(map[string]interface{})
	if len(menus) > 0 {
		renderBookIndex(title, convertToPath(pageUrl), menus, book)
		if len(menus) > 5 {
			book["items"] = menus[:5]
		}
	}
	return
}

func fetchBookDetail(isLast bool, title, url, epath string) {
	var err interface{}
	content, err := httpGet(url)
	defer func() {
		if err != nil {
			fmt.Println(err)
		}
	}()
	if err != nil {
		return
	}
	doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
	if err != nil {
		return
	}
	html, err := doc.Find("#content").Html()
	if err != nil {
		return
	}
	if strings.Contains(html, "章节内容正在手打中") && isLast {
		return
	}

	tplContent, err := Render("detail", map[string]interface{}{"title": title, "content": template.HTML(html)})
	if err != nil {
		return
	}
	err = ioutil.WriteFile(path.Join(basedir, epath), []byte(tplContent), os.ModePerm)
	if err != nil {
		return
	}
	fmt.Println("抓取成功:", title)
}

func renderBookIndex(title, cpath string, menus []map[string]string, book map[string]interface{}) {
	book["title"] = title
	book["url"] = cpath
	book["items"] = menus
	//fmt.Println(book)
	content, err := Render("book", book)
	if err != nil {
		fmt.Println(err)
		return
	}
	dir := path.Dir(path.Join(basedir, cpath))
	os.MkdirAll(dir, os.ModePerm)
	err = ioutil.WriteFile(path.Join(basedir, cpath), []byte(content), os.ModePerm)
	if err != nil {
		fmt.Println(err)
		return
	}
}

func httpGet(url string) (content string, err error) {
	//fmt.Println("url:", url)
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return
	}
	req.Header.Add("User-Agent", `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36`)
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return
	}
	buffer, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return
	}
	reader := transform.NewReader(bytes.NewReader(buffer), simplifiedchinese.GBK.NewDecoder())
	b, err := ioutil.ReadAll(reader)
	if err != nil {
		return
	}
	content = string(b)
	//fmt.Println("content:", content)
	return
}
func convertToPath(url string) string {
	subUrl := strings.Split(url, "/html")[1]
	if strings.HasSuffix(subUrl, "/") {
		subUrl = subUrl + "index.html"
	}
	return subUrl
}
